# -*- coding: utf-8 -*-

# import akshare as ak, os, pandas as pd, time, tqdm

# OUT_DIR = './cn_annual_2003_2022'
# os.makedirs(OUT_DIR, exist_ok=True)

# stocks = ak.stock_zh_a_spot()['代码'].tolist()  # 当前 A 股
# records = []

# for code in tqdm.tqdm(stocks):
#     for year in range(2024, 2025):
#         try:
#             file_path = ak.stock_zh_a_report(file_dir=OUT_DIR,
#                                              symbol=code,
#                                              year=str(year))
#             if file_path:
#                 records.append({'ts_code': code, 'year': year, 'path': file_path})
#         except Exception as e:
#             continue
#         time.sleep(0.2)

# pd.DataFrame(records).to_csv('cn_annual_index.csv', index=False)

import os, time, pandas as pd, akshare as ak
from tqdm import tqdm

OUT_DIR = './cn_annual_2003_2022'
os.makedirs(OUT_DIR, exist_ok=True)

# 1. 当前 A 股列表（剔除退市）
stock_df = ak.stock_zh_a_spot()
tickers  = stock_df['代码'].tolist()

# 2. 准备索引/断点
index_file = 'cn_annual_index.csv'
if os.path.exists(index_file):
    done_df = pd.read_csv(index_file)
    done_set = set(done_df['code_year'])
else:
    done_set = set()

records = []

# 3. 巨潮年报接口 = stock_zh_a_report_cninfo
for ts_code in tqdm(tickers, desc='tickers'):
    # AkShare 要求 6 位不带后缀
    symbol = ts_code.split('.')[0]
    for year in range(2003, 2023):
        key = f"{symbol}_{year}"
        if key in done_set:
            continue           # 断点续传
        try:
            pdf_url = ak.stock_zh_a_report_cninfo(symbol=symbol, year=str(year))
            # pdf_url 形如 http://static.cninfo.com.cn/finalpage/...pdf
            if not pdf_url:
                continue
            fname = f"{symbol}_{year}.pdf"
            fpath = os.path.join(OUT_DIR, fname)

            import requests
            r = requests.get(pdf_url, timeout=15)
            r.raise_for_status()
            with open(fpath, 'wb') as f:
                f.write(r.content)

            records.append({'ts_code': symbol, 'year': year,
                            'path': os.path.abspath(fpath), 'code_year': key})
        except Exception as e:
            # 巨潮偶尔 404，直接跳过
            continue
        time.sleep(0.3)  # AkShare 官方有反爬限速

# 4. 更新索引
if records:
    new_df = pd.DataFrame(records)
    if os.path.exists(index_file):
        new_df = pd.concat([done_df, new_df], ignore_index=True)
    new_df.to_csv(index_file, index=False)
print('✅ 完成！共新增', len(records), '份年报')
